"""Load Data from .csv files."""
from dataclasses import dataclass, field
from pathlib import Path
from typing import List

import pandas as pd

from ethicml.utility import DataTuple

from .dataset import Dataset, LegacyDataset

__all__ = ["load_data", "create_data_obj"]


def load_data(dataset: Dataset) -> DataTuple:
    """Load dataset from its CSV file.

    This function only exists for backwards compatibility. Use dataset.load() instead.

    :param dataset: dataset object
    :returns: DataTuple with dataframes of features, labels and sensitive attributes
    """
    return dataset.load()


@dataclass(kw_only=True)
class ConfigurableDataset(LegacyDataset):
    """A configurable dataset class."""

    filepath_: Path
    s_column: str
    y_column: str
    additional_to_drop: List[str] = field(default_factory=list)

    def __post_init__(self) -> None:
        dataframe: pd.DataFrame = pd.read_csv(self.filepath_)

        columns: list[str] = [str(x) for x in dataframe.columns.to_numpy().tolist()]
        columns.remove(self.s_column)
        columns.remove(self.y_column)
        for additional in self.additional_to_drop:
            columns.remove(additional)

        super().__init__(
            name=self.filepath_.name,
            num_samples=len(dataframe),
            features=columns,
            cont_features=[],
            sens_attr_spec=self.s_column,
            class_label_spec=self.y_column,
            filename_or_path=self.filepath_,
        )


def create_data_obj(
    filepath: Path, s_column: str, y_column: str, additional_to_drop: list[str] | None = None
) -> ConfigurableDataset:
    """Create a `ConfigurableDataset` from the given file.

    :param filepath: path to a CSV file
    :param s_column: column that represents sensitive attributes
    :param y_column: column that contains lables
    :param additional_to_drop: other columns that should be dropped (Default: None)
    :returns: Dataset object
    """
    return ConfigurableDataset(
        filepath_=filepath,
        s_column=s_column,
        y_column=y_column,
        additional_to_drop=[] if additional_to_drop is None else additional_to_drop,
    )
